#!/bin/bash
if [ $# -lt 1 ]
then
    print "Usage: clusterByMainAttr srcFile  mapFile clusterFile"
else
    srcFile=$1
    mapFile=$2
    clusterFile=$3
fi
if [ -f $mapFile ];then
    rm $mapFile
fi

#1. scan all the questions
#2. 
awk  -v mapFile=$mapFile '{
    if (ret[$1]) {
        if (weight[$1] < $3) {
            weight[$1] = $3;
            ret[$1] = $2;
        }
    } else {
        ret[$1] = $2;
        weight[$1] = $3;
    }
}
END {
    for (qId in ret) {
        clusterId = ret[qId];
        print qId, clusterId >> mapFile;
        if(cluster[clusterId]) {
            cluster[clusterId] = cluster[clusterId]","qId;
        } else {
            cluster[clusterId] = clusterId" "qId;
        }
    }
    for (cId in cluster) {
        print cluster[cId];
    }
}' $srcFile > $clusterFile
